The goal of this analysis is to develop a regression to predict water temperature when data are missing or the time series is incomplete.
Currently this analysis is performed for Feather River and Yuba River. The resulting dataset with predicted values are saved and integrated in the development of a water temperature dataset.
Butte Creek is used to build the regression models because the time series is complete and the data are high quality.
Plot of mean temp for Feather River LFC and Butte Creek
## `geom_smooth()` using formula = 'y ~ x'
Plot of min temp for Feather River LFC and Butte Creek
## `geom_smooth()` using formula = 'y ~ x'
Plot of max temp for Feather River LFC and Butte Creek
## `geom_smooth()` using formula = 'y ~ x'
# LFC regression and predictions
# MEAN Regression
split <-rsample::initial_split(feather_lfc_regression_data_mean, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_lfc_mod_mean <- lm(temp ~ date + butte_temp, data = train)
summary(feather_lfc_mod_mean)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8490 -0.8254 0.0363 0.8137 6.0039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.271e+00 1.117e+00 -2.930 0.00344 **
## date 5.582e-04 5.938e-05 9.399 < 2e-16 ***
## butte_temp 4.184e-01 6.002e-03 69.708 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.336 on 1565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7612, Adjusted R-squared: 0.7609
## F-statistic: 2495 on 2 and 1565 DF, p-value: < 2.2e-16
test_predict <- predict(feather_lfc_mod_mean, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.0857866
# MIN Regression
split <-rsample::initial_split(feather_lfc_regression_data_min, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_lfc_mod_min <- lm(temp ~ date + butte_temp, data = train)
summary(feather_lfc_mod_min)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6976 -0.8362 0.0226 0.7779 6.1884
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.624e+00 1.123e+00 -2.337 0.0196 *
## date 5.267e-04 5.978e-05 8.811 <2e-16 ***
## butte_temp 3.921e-01 6.698e-03 58.539 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.346 on 1565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6944, Adjusted R-squared: 0.694
## F-statistic: 1778 on 2 and 1565 DF, p-value: < 2.2e-16
test_predict <- predict(feather_lfc_mod_min, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.08469061
# MAX Regression
split <-rsample::initial_split(feather_lfc_regression_data_max, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_lfc_mod_max <- lm(temp ~ date + butte_temp, data = train)
summary(feather_lfc_mod_max)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3015 -0.8102 0.0117 0.8065 6.1645
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.732e+00 1.214e+00 -3.075 0.00214 **
## date 5.820e-04 6.453e-05 9.019 < 2e-16 ***
## butte_temp 4.356e-01 5.923e-03 73.546 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.449 on 1565 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.779, Adjusted R-squared: 0.7787
## F-statistic: 2758 on 2 and 1565 DF, p-value: < 2.2e-16
test_predict <- predict(feather_lfc_mod_max, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.07865859
# Predictions
feather_gap_predicted_mean_lfc <- predict(feather_lfc_mod_mean, feather_gap_mean)
feather_lfc_mean_predicted <- feather_gap_mean |>
mutate(value = feather_gap_predicted_mean_lfc,
statistic = "mean") |>
select(date, value, statistic)
ggplot(feather_lfc_mean_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_min_lfc <- predict(feather_lfc_mod_min, feather_gap_min)
feather_lfc_min_predicted <- feather_gap_min |>
mutate(value = feather_gap_predicted_min_lfc,
statistic = "min") |>
select(date, value, statistic)
ggplot(feather_lfc_min_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_max_lfc <- predict(feather_lfc_mod_max, feather_gap_max)
feather_lfc_max_predicted <- feather_gap_max |>
mutate(value = feather_gap_predicted_max_lfc,
statistic = "max") |>
select(date, value, statistic)
ggplot(feather_lfc_max_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_lfc <- bind_rows(feather_lfc_max_predicted,
feather_lfc_mean_predicted,
feather_lfc_min_predicted) |>
mutate(stream = "feather river",
site_group = "upper feather lfc") %>%
pivot_wider(id_cols = c(date, stream, site_group), values_from = "value", names_from = "statistic") |>
rename(mean_i = mean,
max_i = max,
min_i = min) |>
full_join(feather_lfc_format |>
pivot_wider(id_cols = c(stream, date, gage_agency, gage_number, site_group), values_from = "value", names_from = "statistic")) |>
mutate(gage_agency = ifelse(is.na(mean), "interpolated", gage_agency),
gage_number = ifelse(is.na(mean), "interpolated", gage_number),
mean = ifelse(is.na(mean), mean_i, mean),
min = ifelse(is.na(min), min_i, min),
max = ifelse(is.na(max), max_i, max)) |>
select(-c(min_i, mean_i, max_i)) |>
pivot_longer(cols = mean:min, values_to = "value", names_to = "statistic") |>
group_by(stream, date, statistic, gage_agency, gage_number, site_group) |> glimpse()
## Joining with `by = join_by(date, stream, site_group)`
## Rows: 26,367
## Columns: 7
## Groups: stream, date, statistic, gage_agency, gage_number, site_group [26,367]
## $ date <date> 1999-12-31, 1999-12-31, 1999-12-31, 2000-01-01, 2000-01-0…
## $ stream <chr> "feather river", "feather river", "feather river", "feathe…
## $ site_group <chr> "upper feather lfc", "upper feather lfc", "upper feather l…
## $ gage_agency <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ gage_number <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ statistic <chr> "mean", "max", "min", "mean", "max", "min", "mean", "max",…
## $ value <dbl> 4.852591, 4.735839, 5.028812, 4.922884, 5.172034, 4.833300…
ggplot() +
geom_line(data = feather_gap_lfc, aes(x = date, y = value, color = statistic)) +
theme_minimal()
## Warning: Removed 3 rows containing missing values (`geom_line()`).
Plot of mean temp for Feather River HFC and Butte Creek
# plot for mean
ggplot(data = feather_hfc_regression_data_mean, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of min temp for Feather River HFC and Butte Creek
# plot for mean
ggplot(data = feather_hfc_regression_data_min, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of max temp for Feather River HFC and Butte Creek
# plot for mean
ggplot(data = feather_hfc_regression_data_max, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
# HFC regression and predictions
# MEAN Regression
split <-rsample::initial_split(feather_hfc_regression_data_mean, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_hfc_mod_mean <- lm(temp ~ date + butte_temp, data = train)
summary(feather_hfc_mod_mean)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.023 -1.358 -0.122 1.351 4.850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.9592732 2.3046957 5.189 2.56e-07 ***
## date -0.0004860 0.0001811 -2.684 0.00741 **
## butte_temp 0.6784402 0.0132344 51.263 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.117 on 1001 degrees of freedom
## Multiple R-squared: 0.7242, Adjusted R-squared: 0.7236
## F-statistic: 1314 on 2 and 1001 DF, p-value: < 2.2e-16
test_predict <- predict(feather_hfc_mod_mean, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] NA
# MIN Regression
split <-rsample::initial_split(feather_hfc_regression_data_min, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_hfc_mod_min <- lm(temp ~ date + butte_temp, data = train)
summary(feather_hfc_mod_min)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9250 -1.1006 0.0304 1.2519 4.3854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.6190437 2.1195204 7.369 3.59e-13 ***
## date -0.0007780 0.0001666 -4.670 3.42e-06 ***
## butte_temp 0.6759568 0.0134890 50.112 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.988 on 1000 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7152, Adjusted R-squared: 0.7147
## F-statistic: 1256 on 2 and 1000 DF, p-value: < 2.2e-16
test_predict <- predict(feather_hfc_mod_min, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.1148937
# MAX Regression
split <-rsample::initial_split(feather_hfc_regression_data_max, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
feather_hfc_mod_max <- lm(temp ~ date + butte_temp, data = train)
summary(feather_hfc_mod_max)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.049 -1.406 -0.159 1.845 5.769
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.0119891 2.9825730 5.704 1.54e-08 ***
## date -0.0007998 0.0002344 -3.412 0.000671 ***
## butte_temp 0.6028812 0.0150607 40.030 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.708 on 1000 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6158, Adjusted R-squared: 0.615
## F-statistic: 801.2 on 2 and 1000 DF, p-value: < 2.2e-16
test_predict <- predict(feather_hfc_mod_max, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.1154672
# Predictions
feather_gap_predicted_mean_hfc <- predict(feather_hfc_mod_mean, feather_gap_mean)
feather_hfc_mean_predicted <- feather_gap_mean |>
mutate(value = feather_gap_predicted_mean_hfc,
statistic = "mean") |>
select(date, value, statistic)
ggplot(feather_hfc_mean_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_min_hfc <- predict(feather_hfc_mod_min, feather_gap_min)
feather_hfc_min_predicted <- feather_gap_min |>
mutate(value = feather_gap_predicted_min_hfc,
statistic = "min") |>
select(date, value, statistic)
ggplot(feather_hfc_min_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_predicted_max_hfc <- predict(feather_hfc_mod_max, feather_gap_max)
feather_hfc_max_predicted <- feather_gap_max |>
mutate(value = feather_gap_predicted_max_hfc,
statistic = "max") |>
select(date, value, statistic)
ggplot(feather_hfc_max_predicted, aes(x = date, y = value)) +
geom_line()
feather_gap_hfc <- bind_rows(feather_hfc_max_predicted,
feather_hfc_mean_predicted,
feather_hfc_min_predicted) |>
mutate(stream = "feather river",
site_group = "upper feather hfc") %>%
pivot_wider(id_cols = c(date, stream, site_group), values_from = "value", names_from = "statistic") |>
rename(mean_i = mean,
max_i = max,
min_i = min) |>
full_join(feather_hfc_format |>
pivot_wider(id_cols = c(stream, date, gage_agency, gage_number, site_group), values_from = "value", names_from = "statistic")) |>
mutate(gage_agency = ifelse(is.na(mean), "interpolated", gage_agency),
gage_number = ifelse(is.na(mean), "interpolated", gage_number),
mean = ifelse(is.na(mean), mean_i, mean),
min = ifelse(is.na(min), min_i, min),
max = ifelse(is.na(max), max_i, max)) |>
select(-c(min_i, mean_i, max_i)) |>
pivot_longer(cols = mean:min, values_to = "value", names_to = "statistic") |>
group_by(stream, date, statistic, gage_agency, gage_number, site_group) |> glimpse()
## Joining with `by = join_by(date, stream, site_group)`
## Rows: 26,403
## Columns: 7
## Groups: stream, date, statistic, gage_agency, gage_number, site_group [26,403]
## $ date <date> 1999-12-31, 1999-12-31, 1999-12-31, 2000-01-01, 2000-01-0…
## $ stream <chr> "feather river", "feather river", "feather river", "feathe…
## $ site_group <chr> "upper feather hfc", "upper feather hfc", "upper feather h…
## $ gage_agency <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ gage_number <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ statistic <chr> "mean", "max", "min", "mean", "max", "min", "mean", "max",…
## $ value <dbl> 9.890990, 11.143629, 10.339793, 10.003578, 11.745711, 10.0…
ggplot() +
geom_line(data = feather_gap_hfc, aes(x = date, y = value, color = statistic)) +
theme_minimal()
## Warning: Removed 3 rows containing missing values (`geom_line()`).
Pull in gage data from YR7 CDEC gage, however, this only contains data from 2020 onwards. Temperature data collected along with RST data was originally used to fill the gap prior to 2020; however, due to inconsistencies in these data sources the resulting predicted mean values were lower than the min values as the RST data only has mean data.
Prepare datasets for regression analysis (dataset with no missing data to train and dataset with missing data to predict)
Plot of mean temp for Yuba River and Butte Creek
# plot for mean
ggplot(data = yuba_regression_data_mean, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of min temp for Yuba River and Butte Creek
# plot for mean
ggplot(data = yuba_regression_data_min, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Plot of max temp for Yuba River and Butte Creek
# plot for mean
ggplot(data = yuba_regression_data_max, aes(x = butte_temp, y = temp)) +
geom_point() +
stat_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
# MEAN Regression
split <-rsample::initial_split(yuba_regression_data_mean, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
yuba_mod_mean <- lm(temp ~ date + butte_temp, data = train)
summary(yuba_mod_mean)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9019 -1.0516 0.0279 1.0812 3.1416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.092e+01 1.593e+00 13.129 <2e-16 ***
## date -7.705e-04 8.378e-05 -9.197 <2e-16 ***
## butte_temp 5.884e-01 6.879e-03 85.543 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.423 on 1297 degrees of freedom
## Multiple R-squared: 0.852, Adjusted R-squared: 0.8517
## F-statistic: 3732 on 2 and 1297 DF, p-value: < 2.2e-16
test_predict <- predict(yuba_mod_mean, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.08459931
# MIN Regression
split <-rsample::initial_split(yuba_regression_data_min, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
yuba_mod_min <- lm(temp ~ date + butte_temp, data = train)
summary(yuba_mod_min)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.648 -1.032 -0.009 1.053 3.257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.291e+01 1.601e+00 14.31 <2e-16 ***
## date -8.818e-04 8.412e-05 -10.48 <2e-16 ***
## butte_temp 5.401e-01 7.560e-03 71.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.419 on 1297 degrees of freedom
## Multiple R-squared: 0.8027, Adjusted R-squared: 0.8024
## F-statistic: 2638 on 2 and 1297 DF, p-value: < 2.2e-16
test_predict <- predict(yuba_mod_min, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.09698594
# MAX Regression
split <-rsample::initial_split(yuba_regression_data_max, prop = 0.8)
train <- rsample::training(split)
test <- rsample::testing(split)
yuba_mod_max <- lm(temp ~ date + butte_temp, data = train)
summary(yuba_mod_max)
##
## Call:
## lm(formula = temp ~ date + butte_temp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6824 -0.8997 0.0082 0.9982 6.2881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.889e+01 1.613e+00 11.713 < 2e-16 ***
## date -6.650e-04 8.466e-05 -7.855 8.34e-15 ***
## butte_temp 6.400e-01 6.234e-03 102.669 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.418 on 1297 degrees of freedom
## Multiple R-squared: 0.8924, Adjusted R-squared: 0.8922
## F-statistic: 5377 on 2 and 1297 DF, p-value: < 2.2e-16
test_predict <- predict(yuba_mod_max, test)
test_predict_df <- test |>
mutate(predicted = test_predict)
# evaluate predictions - MAPE of 10% is not bad
mean(abs((
test_predict_df$predicted - test_predict_df$temp
)) / test_predict_df$temp)
## [1] 0.07724969
# Predictions
yuba_gap_predicted_mean <- predict(yuba_mod_mean, yuba_gap_mean)
yuba_mean_predicted <- yuba_gap_mean |>
mutate(value = yuba_gap_predicted_mean,
statistic = "mean_predicted") |>
select(date, value, statistic)
ggplot(yuba_mean_predicted, aes(x = date, y = value)) +
geom_line()
## Warning: Removed 1 row containing missing values (`geom_line()`).
yuba_gap_predicted_min <- predict(yuba_mod_min, yuba_gap_min)
yuba_min_predicted <- yuba_gap_min |>
mutate(value = yuba_gap_predicted_min,
statistic = "min_predicted") |>
select(date, value, statistic)
ggplot(yuba_min_predicted, aes(x = date, y = value)) +
geom_line()
## Warning: Removed 1 row containing missing values (`geom_line()`).
yuba_gap_predicted_max <- predict(yuba_mod_max, yuba_gap_max)
yuba_max_predicted <- yuba_gap_max |>
mutate(value = yuba_gap_predicted_max,
statistic = "max_predicted") |>
select(date, value, statistic)
ggplot(yuba_max_predicted, aes(x = date, y = value)) +
geom_line()
## Warning: Removed 1 row containing missing values (`geom_line()`).
yuba_gap <- bind_rows(yuba_max_predicted,
yuba_mean_predicted,
yuba_min_predicted) |>
mutate(stream = "yuba river") |>
pivot_wider(id_cols = c(date, stream), values_from = "value", names_from = "statistic") |>
full_join(yuba_format |>
pivot_wider(id_cols = c(stream, date, gage_agency, gage_number), values_from = "value", names_from = "statistic")) |>
mutate(gage_agency = ifelse(is.na(mean), "interpolated", gage_agency),
gage_number = ifelse(is.na(mean), "interpolated", gage_number),
mean = ifelse(is.na(mean), mean_predicted, mean),
min = ifelse(is.na(min), min_predicted, min),
max = ifelse(is.na(max), max_predicted, max)) |>
select(-c(mean_predicted, max_predicted, min_predicted)) |>
pivot_longer(cols = mean:min, values_to = "value", names_to = "statistic") |>
group_by(stream, date, statistic, gage_agency, gage_number) |> glimpse()
## Joining with `by = join_by(date, stream)`
## Rows: 26,367
## Columns: 6
## Groups: stream, date, statistic, gage_agency, gage_number [26,367]
## $ date <date> 1999-12-31, 1999-12-31, 1999-12-31, 2000-01-01, 2000-01-0…
## $ stream <chr> "yuba river", "yuba river", "yuba river", "yuba river", "y…
## $ gage_agency <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ gage_number <chr> "interpolated", "interpolated", "interpolated", "interpola…
## $ statistic <chr> "mean", "max", "min", "mean", "max", "min", "mean", "max",…
## $ value <dbl> 15.30452, 14.67537, 15.83686, 15.40182, 15.31474, 15.56593…
ggplot() +
geom_line(data = yuba_gap, aes(x = date, y = value, color = statistic)) +
theme_minimal()
## Warning: Removed 3 rows containing missing values (`geom_line()`).